% Read in Largest 100 Cities, 1900, 1940, 1980 and 2020

clear all;
small = 1.0e-10;
pinv_tol = 1.0e-05;
big = 1.0e+8;
global datadir;

% -- File Directories   
outdir = 'out/';
figdir = 'fig/';
matdir = 'mat/';
datadir = '../Data/CitySize/';
standir = '../Stan/CitySize/';

% Read in Data
str_data = [datadir 'USCities_100.xlsx'];
yr = readmatrix(str_data,'Range','A55:A67');
cs_100_data = NaN(100,4);
cs_100_data(:,1) = readmatrix(str_data,'Range','C2:C101');
cs_100_data(:,2) = readmatrix(str_data,'Range','E2:E101');
cs_100_data(:,3) = readmatrix(str_data,'Range','G2:G101');
cs_100_data(:,4) = readmatrix(str_data,'Range','I2:I101');

%  Aggregate population for these years
pop_agg = NaN(4,1);
pop_agg(1,1) = 76212168;
pop_agg(2,1) = 132164569;
pop_agg(3,1) = 226545805;
pop_agg(4,1) = 331449281;

% Compute Share of population
cs_100_data = cs_100_data./repmat(pop_agg',100,1);
cs_100_data = 100*cs_100_data;

% Year vector
yr = [1900 1940 1980 2020]';

% Transpose
cs_data = cs_100_data'; % now 4 x 100

% Save Data
str_save = [matdir 'CitySize_100.mat'];
save(str_save,'yr','cs_data');

% % Save cs_data as excel file
% Save files as largest 20, 30, 40, ... ,  100 firms
str_save = [standir 'CitySize_Data_30.xlsx'];
writematrix(cs_data(:,1:30),str_save);
